User blog:QuickNick/New Series Tables
__TOC__ Series Amateur NASCAR Pro/Am Pro Expert Master Elite Legend Exclusive Series Power Query Extract I realise that the change to the format of the Series tables will cause problems for those who extract the data with Power Query so here is an example script ... //---------------------------------------------------------------- // query to load Nick's new career page group tables //---------------------------------------------------------------- let // load the career web page into a table of career groups strPage = "http://rr3.wikia.com/wiki/Series_and_Special_Events", tblCareerPage = Web.Page(Web.Contents(strPage)), //---------------- funcLoadGroup = (GrpKey, Group) => let // load the group table tblLoad = Table.SelectRows(tblCareerPage, each Text.Upper(Caption) = Text.Upper(Group) or Text.Upper(Caption) = Text.Upper(Group & "Edit") ){0}Data, // determine if table includes single-cell-style special series tblWork = if Table.HasColumns(tblLoad, "Series (Abbr.)") then // no: treat as normal Table.RenameColumns( tblLoad ,{ {"Index", "IndexVer"} ,{"Series (Abbr.)", "SeriesAbbr"} ,{"Cars", "Car1"} ,{"Cars2", "Car2"} ,{"Cars3", "Car3"} ,{"Cars4", "Car4"} }) else // yes: promote first row as column headers Table.RenameColumns( Table.PromoteHeaders( tblLoad ,PromoteAllScalars=true) ,{ {"Index", "IndexVer"} ,{"Series (Abbr.)", "SeriesAbbr"} ,{"Cars", "Car1"} ,{"Cars_1", "Car2"} ,{"Cars_2", "Car3"} ,{"Cars_3", "Car4"} }) , // process the table tblGroup = // 39. add the group key and group name Table.AddColumn( Table.AddColumn( // 33. group the series rows Table.Group( // 32. expand the cars sub-table Table.ExpandTableColumn( // 31. create sub-table of cars Table.AddColumn( // 22. add the series and abbreviation columns Table.AddColumn( Table.AddColumn( // 21. split the series and abbreviation Table.AddColumn( // 14. set the version Table.AddColumn( // 13. set the special flag Table.AddColumn( // 12. set the index Table.AddColumn( // 11. split the index, special flag and version number Table.AddColumn( // 01. set the column types Table.TransformColumnTypes( // 00. use the above work table tblWork // 01. ,{ {"IndexVer" ,type text} ,{"SeriesAbbr", type text} ,{"Car1", type text} ,{"Car2", type text} ,{"Car3", type text} ,{"Car4", type text} }) // 11. ,"_tmpIndexSpecialVer", each funcSetIndexSpecialVersion(IndexVer) ) // 12. ,"SerIndex", each _tmpIndexSpecialVerstrIndex ) // 13. ,"SerSpecial", each ( if SeriesAbbr = Car1 or _tmpIndexSpecialVerflagSpecial = "Y" then "Y" else "N" ) ) // 14. ,"SerVer", each _tmpIndexSpecialVerstrVersion ) // 21. ,"_recSeriesAbbr", each funcSplitSeriesAbbr(SeriesAbbr) ) // 22. ,"Series", each _recSeriesAbbrSeries) ,"SerAbbr", each _recSeriesAbbrSerAbbr) // 31. ,"tblCars", each funcCarsToTable(SerSpecial, SeriesAbbr, Car1, Car2, Car3, Car4) ) // 32. ,"tblCars", {"Car"}) // 33. ,{"SerIndex", "SerAbbr", "Series", "SerVer", "SerSpecial"} ,{ {"tblCars", each Table.AddIndexColumn( Table.SelectColumns(_, {"Car"}) ,"CarSortKey", 1, 1) , type table} }) // 39. ,"GrpKey", each GrpKey) ,"Group", each Group) // done! in tblGroup, //---- funcSetIndexSpecialVersion = (strIndexVer) => /* IndexVer column lines 1. Index 2. * = Special Series 3. (Version) But a line will only be there if there is something on the line! */ let // replace the carriage-returns AND line-feeds with hash characters (which shouldn't be there) strReplace = Text.Replace(Text.Replace(strIndexVer,"#(cr)","#"),"#(lf)","#"), // split into a list of parts lstSplit = Text.Split(strReplace, "#"), // determine the details and return them in a record recOut = [ // set the index strIndex = if lstSplit{0} = "-" or lstSplit{0} = "*" or Text.Start(lstSplit{0}, 1) = "(" then null else lstSplit{0}, // check for the Special Series * asterisk and set the flag flagSpecial = if List.Contains(lstSplit,"*") then "Y" else "N", // check for the Version bracket and set the Version strVersion = if List.MatchesAny(lstSplit, each Text.Contains(_, "(") ) then [ // use a record to effectively do a multi-line if-else statement! tmpVersion = Text.Replace( Text.Replace( List.Select(lstSplit, each Text.Contains(_, "(") ){0} ,"(", "") ,")", "") , strVersion = if tmpVersion = "?" then null else tmpVersion ]strVersion // return strVersion from the above record defintion else null ] in recOut, //---- funcSplitSeriesAbbr = (strSeriesAbbr) => let posBracket = Text.PositionOf(strSeriesAbbr, "(", Occurrence.Last), recSeriesAbbr = if posBracket >= 0 then [ Series = Text.Start(strSeriesAbbr, posBracket), SerAbbr = Text.Replace(Text.Range(strSeriesAbbr, posBracket + 1), ")", "") ] else [ Series = strSeriesAbbr, SerAbbr = null ] in recSeriesAbbr, //---- funcCarsToTable = (flagSpecial, seriesAbbr, car1, car2, car3, car4) => if Text.Upper(flagSpecial) = "Y" and seriesAbbr = car1 then null else Table.SelectRows( #table({"Car"}, { {car1}, {car2}, {car3}, {car4} }) ,each Car <> null and Car <> "") , //---------------- // use above function to load all series and cars and combine into row entries for each series with a sub-table of cars // then carry out the steps below which must be done on the data-set as a whole e.g. add the series key // expand the data so there's a separate row for each group-series-car combination //-------- // TESTING //---- // tblSeries = funcLoadGroup(0, "Specials") // tblSeries = funcLoadGroup(1, "Amateur") // tblSeries = funcLoadGroup(2, "NASCAR") // tblSeries = funcLoadGroup(3, "Pro/Am") // tblSeries = funcLoadGroup(4, "Pro") // tblSeries = funcLoadGroup(5, "Expert") // tblSeries = funcLoadGroup(6, "Master") // tblSeries = funcLoadGroup(7, "Elite") // tblSeries = funcLoadGroup(8, "Legend") // tblSeries = funcLoadGroup(9, "Exclusive Series") //-------- /* to test the above group loads just wrap the section below in these comment characters and remove the // from the group line you wish to test */ tblSeries = // 99. return the relevant columns in the desired order Table.SelectColumns( // 42. expand the cars sub-table Table.ExpandTableColumn( // 41. add a series key Table.AddIndexColumn( // 00. load the series using the above function // this isn't ideal as I've had to hard-code the group numbers and names ... but it works! funcLoadGroup(1, "Amateur") & funcLoadGroup(2, "NASCAR") & funcLoadGroup(3, "Pro/Am") & funcLoadGroup(4, "Pro") & funcLoadGroup(5, "Expert") & funcLoadGroup(6, "Master") & funcLoadGroup(7, "Elite") & funcLoadGroup(8, "Legend") & funcLoadGroup(9, "Exclusive Series") // 41. ,"SerKey", 1, 1) // 42. ,"tblCars", {"Car", "CarSortKey"}) // 99. ,{"GrpKey", "Group", "SerKey", "SerIndex", "SerAbbr", "Series", "SerVer", "SerSpecial", "Car", "CarSortKey"}) // done! in tblSeries //---------------------------------------------------------------- Multiple commands can be combined into a single line of code strOut = Text.Trim(Text.Trim(strIn, "(" ), ")" ), with the inner-most being executed first. Something to consider/remember about PQ is that every line of code has to assign a result to something and once assigned it cannot be changed. I see no point in creating a lot of unnecessary instances, having to use those awful names and then never using them again! So I combine lines of code where (I think) it is logical to do so. To make it easier (for me) to understand long statements I number each command-parameters combination. It's also much easier to simply comment out lines of code rather than doing that AND having to rename the next instance to refer to the last not-commented-out instance! The script makes use of inline functions, for those who don’t know these can be *Single-line functions e.g. **'funcCarsToTable' is a single line of code *Multi-line functions which must be wrapped in an additional let ... in construct e.g. **'funcLoadGroup' **'funcSetIndexSpecialVersion' **'funcSplitSeriesAbbr' Comments can appear anywhere ... *'//' is a single-line comment, the rest of the line is a comment. *'/* ... */ ' can be a short embedded comment or a massive multi-line comment, everything inside is ignored. // comment strGroup = "Amateur", // comment strSeries /* comment */ = "Racing School Basics", /* Comment-1 Comment-2 */ Some simple but important points ... *''Read the points below and then review the script above, it will make sense!.'' *All scripts and multi-line functions start with let and end with in. **You can NOT have any code outside of the script's let...in. *The last command/statement before the in (or in a [ record ] definition) must NOT end with a comma. **Functions are treated as a single statement so they have to end with a comma. *A function can return only one thing BUT that can be anything (e.g. a list, a record, a table, etc) so it is possible to return multiple values. *Each if must have a corresponding else ... remember, every command has to assign a value to something, so you can't omit the else part! *The if and else can only have one statement and that is assigning a value BUT that can be a call to a function (which may include multiple statements) OR (something I worked out) you can wrap a series of statements inside a [ record ] construct ... I have included one of these in the above script to show how it works. Feel free to contact me if you need help. Nick